package com.ustcinfo.study.scala.r1.chenzhiyu

import org.apache.spark.{SparkConf, SparkContext}

/**
  * 运行失败
  * @author chen.zhiyu2
  * @email chen.zhiyu2@ustcinfo.com
  * @date 2018/08/04
  */
class scalaPractice {
  //new 一个SparkConf类对象，设置Master和Name
  val sparkConf = new SparkConf().setMaster("local").setAppName("scalaPractice")
  //传sparkcon配置
  val sc = new SparkContext(sparkConf)
  //设置文本路径
  val txtRdd = sc.textFile("src/main/resources/sampleData/programmingGuide")
  txtRdd
    //过滤空格
    .filter(x => x.trim().length > 0)
    //按空格分割
    .map(x => x.split(" "))
    //映射多个值，一对多
    .flatMap(x => x)
    //把k转成k,v结构
    .map(x => (x,1))
    //v值相加
    .reduceByKey(_ + _)
    //k,v交换位置，按v值降序，获取前十行
    .map(x => (x._2, x._1)).sortByKey(false).take(10)
    //打印
    .foreach(println)
}
